/*
 * Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 * in the COPYING file in the root directory of this source tree).
 * You may select, at your option, one of the above-listed licenses.
 */

#include "zstd_opt.h"

#include "hist.h"
#include "zstd_compress_internal.h"

#define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
#define ZSTD_FREQ_DIV 4    /* log factor when using previous stats to init next stats */
#define ZSTD_MAX_PRICE (1 << 30)

#define ZSTD_PREDEF_THRESHOLD                                                                                       \
    1024 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined \
            distributions */

/*-*************************************
 *  Price functions for optimal parser
 ***************************************/

#if 0 /* approximation at bit level */
#define BITCOST_ACCURACY 0
#define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
#define WEIGHT(stat) ((void)opt, ZSTD_bitWeight(stat))
#elif 0 /* fractional bit accuracy */
#define BITCOST_ACCURACY 8
#define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
#define WEIGHT(stat, opt) ((void)opt, ZSTD_fracWeight(stat))
#else /* opt==approx, ultra==accurate */
#define BITCOST_ACCURACY 8
#define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
#define WEIGHT(stat, opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
#endif

MEM_STATIC U32 ZSTD_bitWeight(U32 stat) {
    return (ZSTD_highbit32(stat + 1) * BITCOST_MULTIPLIER);
}

MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat) {
    U32 const stat = rawStat + 1;
    U32 const hb = ZSTD_highbit32(stat);
    U32 const BWeight = hb * BITCOST_MULTIPLIER;
    U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
    U32 const weight = BWeight + FWeight;
    assert(hb + BITCOST_ACCURACY < 31);
    return weight;
}

#if (DEBUGLEVEL >= 2)
/* debugging function,
 * @return price in bytes as fractional value
 * for debug messages only */
MEM_STATIC double ZSTD_fCost(U32 price) {
    return (double)price / (BITCOST_MULTIPLIER * 8);
}
#endif

static int ZSTD_compressedLiterals(optState_t const* const optPtr) {
    return optPtr->literalCompressionMode != ZSTD_lcm_uncompressed;
}

static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel) {
    if (ZSTD_compressedLiterals(optPtr))
        optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel);
    optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel);
    optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel);
    optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel);
}

/* ZSTD_downscaleStat() :
 * reduce all elements in table by a factor 2^(ZSTD_FREQ_DIV+malus)
 * return the resulting sum of elements */
static U32 ZSTD_downscaleStat(unsigned* table, U32 lastEltIndex, int malus) {
    U32 s, sum = 0;
    DEBUGLOG(5, "ZSTD_downscaleStat (nbElts=%u)", (unsigned)lastEltIndex + 1);
    assert(ZSTD_FREQ_DIV + malus > 0 && ZSTD_FREQ_DIV + malus < 31);
    for (s = 0; s < lastEltIndex + 1; s++) {
        table[s] = 1 + (table[s] >> (ZSTD_FREQ_DIV + malus));
        sum += table[s];
    }
    return sum;
}

/* ZSTD_rescaleFreqs() :
 * if first block (detected by optPtr->litLengthSum == 0) : init statistics
 *    take hints from dictionary if there is one
 *    or init from zero, using src for literals stats, or flat 1 for match symbols
 * otherwise downscale existing stats, to be used as seed for next block.
 */
static void ZSTD_rescaleFreqs(optState_t* const optPtr,
                              const BYTE* const src,
                              size_t const srcSize,
                              int const optLevel) {
    int const compressedLiterals = ZSTD_compressedLiterals(optPtr);
    DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
    optPtr->priceType = zop_dynamic;

    if (optPtr->litLengthSum == 0) {            /* first block : init */
        if (srcSize <= ZSTD_PREDEF_THRESHOLD) { /* heuristic */
            DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef");
            optPtr->priceType = zop_predef;
        }

        assert(optPtr->symbolCosts != NULL);
        if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
            /* huffman table presumed generated by dictionary */
            optPtr->priceType = zop_dynamic;

            if (compressedLiterals) {
                unsigned lit;
                assert(optPtr->litFreq != NULL);
                optPtr->litSum = 0;
                for (lit = 0; lit <= MaxLit; lit++) {
                    U32 const scaleLog = 11; /* scale to 2K */
                    U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->huf.CTable, lit);
                    assert(bitCost <= scaleLog);
                    optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog - bitCost) : 1 /*minimum to calculate cost*/;
                    optPtr->litSum += optPtr->litFreq[lit];
                }
            }

            {
                unsigned ll;
                FSE_CState_t llstate;
                FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable);
                optPtr->litLengthSum = 0;
                for (ll = 0; ll <= MaxLL; ll++) {
                    U32 const scaleLog = 10; /* scale to 1K */
                    U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll);
                    assert(bitCost < scaleLog);
                    optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog - bitCost) : 1 /*minimum to calculate cost*/;
                    optPtr->litLengthSum += optPtr->litLengthFreq[ll];
                }
            }

            {
                unsigned ml;
                FSE_CState_t mlstate;
                FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable);
                optPtr->matchLengthSum = 0;
                for (ml = 0; ml <= MaxML; ml++) {
                    U32 const scaleLog = 10;
                    U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml);
                    assert(bitCost < scaleLog);
                    optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog - bitCost) : 1 /*minimum to calculate cost*/;
                    optPtr->matchLengthSum += optPtr->matchLengthFreq[ml];
                }
            }

            {
                unsigned of;
                FSE_CState_t ofstate;
                FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable);
                optPtr->offCodeSum = 0;
                for (of = 0; of <= MaxOff; of++) {
                    U32 const scaleLog = 10;
                    U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of);
                    assert(bitCost < scaleLog);
                    optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog - bitCost) : 1 /*minimum to calculate cost*/;
                    optPtr->offCodeSum += optPtr->offCodeFreq[of];
                }
            }

        } else { /* not a dictionary */

            assert(optPtr->litFreq != NULL);
            if (compressedLiterals) {
                unsigned lit = MaxLit;
                HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */
                optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
            }

            {
                unsigned ll;
                for (ll = 0; ll <= MaxLL; ll++)
                    optPtr->litLengthFreq[ll] = 1;
            }
            optPtr->litLengthSum = MaxLL + 1;

            {
                unsigned ml;
                for (ml = 0; ml <= MaxML; ml++)
                    optPtr->matchLengthFreq[ml] = 1;
            }
            optPtr->matchLengthSum = MaxML + 1;

            {
                unsigned of;
                for (of = 0; of <= MaxOff; of++)
                    optPtr->offCodeFreq[of] = 1;
            }
            optPtr->offCodeSum = MaxOff + 1;
        }

    } else { /* new block : re-use previous statistics, scaled down */

        if (compressedLiterals)
            optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
        optPtr->litLengthSum = ZSTD_downscaleStat(optPtr->litLengthFreq, MaxLL, 0);
        optPtr->matchLengthSum = ZSTD_downscaleStat(optPtr->matchLengthFreq, MaxML, 0);
        optPtr->offCodeSum = ZSTD_downscaleStat(optPtr->offCodeFreq, MaxOff, 0);
    }

    ZSTD_setBasePrices(optPtr, optLevel);
}

/* ZSTD_rawLiteralsCost() :
 * price of literals (only) in specified segment (which length can be 0).
 * does not include price of literalLength symbol */
static U32 ZSTD_rawLiteralsCost(const BYTE* const literals,
                                U32 const litLength,
                                const optState_t* const optPtr,
                                int optLevel) {
    if (litLength == 0)
        return 0;

    if (!ZSTD_compressedLiterals(optPtr))
        return (litLength << 3) * BITCOST_MULTIPLIER; /* Uncompressed - 8 bytes per literal. */

    if (optPtr->priceType == zop_predef)
        return (litLength * 6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */

    /* dynamic statistics */
    {
        U32 price = litLength * optPtr->litSumBasePrice;
        U32 u;
        for (u = 0; u < litLength; u++) {
            assert(WEIGHT(optPtr->litFreq[literals[u]], optLevel)
                   <= optPtr->litSumBasePrice); /* literal cost should never be negative */
            price -= WEIGHT(optPtr->litFreq[literals[u]], optLevel);
        }
        return price;
    }
}

/* ZSTD_litLengthPrice() :
 * cost of literalLength symbol */
static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel) {
    if (optPtr->priceType == zop_predef)
        return WEIGHT(litLength, optLevel);

    /* dynamic statistics */
    {
        U32 const llCode = ZSTD_LLcode(litLength);
        return (LL_bits[llCode] * BITCOST_MULTIPLIER) + optPtr->litLengthSumBasePrice
               - WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
    }
}

/* ZSTD_litLengthContribution() :
 * @return ( cost(litlength) - cost(0) )
 * this value can then be added to rawLiteralsCost()
 * to provide a cost which is directly comparable to a match ending at same position */
static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* const optPtr, int optLevel) {
    if (optPtr->priceType >= zop_predef)
        return (int)WEIGHT(litLength, optLevel);

    /* dynamic statistics */
    {
        U32 const llCode = ZSTD_LLcode(litLength);
        int const contribution =
            (int)(LL_bits[llCode] * BITCOST_MULTIPLIER)
            + (int)WEIGHT(optPtr->litLengthFreq[0], optLevel) /* note: log2litLengthSum cancel out */
            - (int)WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
#if 1
        return contribution;
#else
        return MAX(0, contribution); /* sometimes better, sometimes not ... */
#endif
    }
}

/* ZSTD_literalsContribution() :
 * creates a fake cost for the literals part of a sequence
 * which can be compared to the ending cost of a match
 * should a new match start at this position */
static int ZSTD_literalsContribution(const BYTE* const literals,
                                     U32 const litLength,
                                     const optState_t* const optPtr,
                                     int optLevel) {
    int const contribution = (int)ZSTD_rawLiteralsCost(literals, litLength, optPtr, optLevel)
                             + ZSTD_litLengthContribution(litLength, optPtr, optLevel);
    return contribution;
}

/* ZSTD_getMatchPrice() :
 * Provides the cost of the match part (offset + matchLength) of a sequence
 * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
 * optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */
FORCE_INLINE_TEMPLATE U32 ZSTD_getMatchPrice(U32 const offset,
                                             U32 const matchLength,
                                             const optState_t* const optPtr,
                                             int const optLevel) {
    U32 price;
    U32 const offCode = ZSTD_highbit32(offset + 1);
    U32 const mlBase = matchLength - MINMATCH;
    assert(matchLength >= MINMATCH);

    if (optPtr->priceType == zop_predef) /* fixed scheme, do not use statistics */
        return WEIGHT(mlBase, optLevel) + ((16 + offCode) * BITCOST_MULTIPLIER);

    /* dynamic statistics */
    price =
        (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
    if ((optLevel < 2) /*static*/ && offCode >= 20)
        price +=
            (offCode - 19) * 2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */

    /* match Length */
    {
        U32 const mlCode = ZSTD_MLcode(mlBase);
        price += (ML_bits[mlCode] * BITCOST_MULTIPLIER)
                 + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel));
    }

    price += BITCOST_MULTIPLIER
             / 5; /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */

    DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price);
    return price;
}

/* ZSTD_updateStats() :
 * assumption : literals + litLengtn <= iend */
static void ZSTD_updateStats(optState_t* const optPtr,
                             U32 litLength,
                             const BYTE* literals,
                             U32 offsetCode,
                             U32 matchLength) {
    /* literals */
    if (ZSTD_compressedLiterals(optPtr)) {
        U32 u;
        for (u = 0; u < litLength; u++)
            optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
        optPtr->litSum += litLength * ZSTD_LITFREQ_ADD;
    }

    /* literal Length */
    {
        U32 const llCode = ZSTD_LLcode(litLength);
        optPtr->litLengthFreq[llCode]++;
        optPtr->litLengthSum++;
    }

    /* match offset code (0-2=>repCode; 3+=>offset+2) */
    {
        U32 const offCode = ZSTD_highbit32(offsetCode + 1);
        assert(offCode <= MaxOff);
        optPtr->offCodeFreq[offCode]++;
        optPtr->offCodeSum++;
    }

    /* match Length */
    {
        U32 const mlBase = matchLength - MINMATCH;
        U32 const mlCode = ZSTD_MLcode(mlBase);
        optPtr->matchLengthFreq[mlCode]++;
        optPtr->matchLengthSum++;
    }
}

/* ZSTD_readMINMATCH() :
 * function safe only for comparisons
 * assumption : memPtr must be at least 4 bytes before end of buffer */
MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length) {
    switch (length) {
        default:
        case 4:
            return MEM_read32(memPtr);
        case 3:
            if (MEM_isLittleEndian())
                return MEM_read32(memPtr) << 8;
            else
                return MEM_read32(memPtr) >> 8;
    }
}

/* Update hashTable3 up to ip (excluded)
   Assumption : always within prefix (i.e. not within extDict) */
static U32 ZSTD_insertAndFindFirstIndexHash3(ZSTD_matchState_t* ms, U32* nextToUpdate3, const BYTE* const ip) {
    U32* const hashTable3 = ms->hashTable3;
    U32 const hashLog3 = ms->hashLog3;
    const BYTE* const base = ms->window.base;
    U32 idx = *nextToUpdate3;
    U32 const target = (U32)(ip - base);
    size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3);
    assert(hashLog3 > 0);

    while (idx < target) {
        hashTable3[ZSTD_hash3Ptr(base + idx, hashLog3)] = idx;
        idx++;
    }

    *nextToUpdate3 = target;
    return hashTable3[hash3];
}

/*-*************************************
 *  Binary Tree search
 ***************************************/
/** ZSTD_insertBt1() : add one or multiple positions to tree.
 *  ip : assumed <= iend-8 .
 * @return : nb of positions added */
static U32 ZSTD_insertBt1(ZSTD_matchState_t* ms,
                          const BYTE* const ip,
                          const BYTE* const iend,
                          U32 const mls,
                          const int extDict) {
    const ZSTD_compressionParameters* const cParams = &ms->cParams;
    U32* const hashTable = ms->hashTable;
    U32 const hashLog = cParams->hashLog;
    size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
    U32* const bt = ms->chainTable;
    U32 const btLog = cParams->chainLog - 1;
    U32 const btMask = (1 << btLog) - 1;
    U32 matchIndex = hashTable[h];
    size_t commonLengthSmaller = 0, commonLengthLarger = 0;
    const BYTE* const base = ms->window.base;
    const BYTE* const dictBase = ms->window.dictBase;
    const U32 dictLimit = ms->window.dictLimit;
    const BYTE* const dictEnd = dictBase + dictLimit;
    const BYTE* const prefixStart = base + dictLimit;
    const BYTE* match;
    const U32 current = (U32)(ip - base);
    const U32 btLow = btMask >= current ? 0 : current - btMask;
    U32* smallerPtr = bt + 2 * (current & btMask);
    U32* largerPtr = smallerPtr + 1;
    U32 dummy32; /* to be nullified at the end */
    U32 const windowLow = ms->window.lowLimit;
    U32 matchEndIdx = current + 8 + 1;
    size_t bestLength = 8;
    U32 nbCompares = 1U << cParams->searchLog;
#ifdef ZSTD_C_PREDICT
    U32 predictedSmall = *(bt + 2 * ((current - 1) & btMask) + 0);
    U32 predictedLarge = *(bt + 2 * ((current - 1) & btMask) + 1);
    predictedSmall += (predictedSmall > 0);
    predictedLarge += (predictedLarge > 0);
#endif /* ZSTD_C_PREDICT */

    DEBUGLOG(8, "ZSTD_insertBt1 (%u)", current);

    assert(ip <= iend - 8); /* required for h calculation */
    hashTable[h] = current; /* Update Hash Table */

    assert(windowLow > 0);
    while (nbCompares-- && (matchIndex >= windowLow)) {
        U32* const nextPtr = bt + 2 * (matchIndex & btMask);
        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
        assert(matchIndex < current);

#ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */
        const U32* predictPtr = bt + 2 * ((matchIndex - 1) & btMask); /* written this way, as bt is a roll buffer */
        if (matchIndex == predictedSmall) {
            /* no need to check length, result known */
            *smallerPtr = matchIndex;
            if (matchIndex <= btLow) {
                smallerPtr = &dummy32;
                break;
            }                         /* beyond tree size, stop the search */
            smallerPtr = nextPtr + 1; /* new "smaller" => larger of match */
            matchIndex = nextPtr[1];  /* new matchIndex larger than previous (closer to current) */
            predictedSmall = predictPtr[1] + (predictPtr[1] > 0);
            continue;
        }
        if (matchIndex == predictedLarge) {
            *largerPtr = matchIndex;
            if (matchIndex <= btLow) {
                largerPtr = &dummy32;
                break;
            } /* beyond tree size, stop the search */
            largerPtr = nextPtr;
            matchIndex = nextPtr[0];
            predictedLarge = predictPtr[0] + (predictPtr[0] > 0);
            continue;
        }
#endif

        if (!extDict || (matchIndex + matchLength >= dictLimit)) {
            assert(matchIndex + matchLength >= dictLimit); /* might be wrong if actually extDict */
            match = base + matchIndex;
            matchLength += ZSTD_count(ip + matchLength, match + matchLength, iend);
        } else {
            match = dictBase + matchIndex;
            matchLength += ZSTD_count_2segments(ip + matchLength, match + matchLength, iend, dictEnd, prefixStart);
            if (matchIndex + matchLength >= dictLimit)
                match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
        }

        if (matchLength > bestLength) {
            bestLength = matchLength;
            if (matchLength > matchEndIdx - matchIndex)
                matchEndIdx = matchIndex + (U32)matchLength;
        }

        if (ip + matchLength == iend) { /* equal : no way to know if inf or sup */
            break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree
                    */
        }

        if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */
            /* match is smaller than current */
            *smallerPtr = matchIndex;          /* update smaller idx */
            commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
            if (matchIndex <= btLow) {
                smallerPtr = &dummy32;
                break;
            }                         /* beyond tree size, stop searching */
            smallerPtr = nextPtr + 1; /* new "candidate" => larger than match, which was smaller than target */
            matchIndex = nextPtr[1];  /* new matchIndex, larger than previous and closer to current */
        } else {
            /* match is larger than current */
            *largerPtr = matchIndex;
            commonLengthLarger = matchLength;
            if (matchIndex <= btLow) {
                largerPtr = &dummy32;
                break;
            } /* beyond tree size, stop searching */
            largerPtr = nextPtr;
            matchIndex = nextPtr[0];
        }
    }

    *smallerPtr = *largerPtr = 0;
    {
        U32 positions = 0;
        if (bestLength > 384)
            positions = MIN(192, (U32)(bestLength - 384)); /* speed optimization */
        assert(matchEndIdx > current + 8);
        return MAX(positions, matchEndIdx - (current + 8));
    }
}

FORCE_INLINE_TEMPLATE
void ZSTD_updateTree_internal(ZSTD_matchState_t* ms,
                              const BYTE* const ip,
                              const BYTE* const iend,
                              const U32 mls,
                              const ZSTD_dictMode_e dictMode) {
    const BYTE* const base = ms->window.base;
    U32 const target = (U32)(ip - base);
    U32 idx = ms->nextToUpdate;
    DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u  (dictMode:%u)", idx, target, dictMode);

    while (idx < target) {
        U32 const forward = ZSTD_insertBt1(ms, base + idx, iend, mls, dictMode == ZSTD_extDict);
        assert(idx < (U32)(idx + forward));
        idx += forward;
    }
    assert((size_t)(ip - base) <= (size_t)(U32)(-1));
    assert((size_t)(iend - base) <= (size_t)(U32)(-1));
    ms->nextToUpdate = target;
}

void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
    ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
}

FORCE_INLINE_TEMPLATE
U32 ZSTD_insertBtAndGetAllMatches(
    ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */
    ZSTD_matchState_t* ms,
    U32* nextToUpdate3,
    const BYTE* const ip,
    const BYTE* const iLimit,
    const ZSTD_dictMode_e dictMode,
    const U32 rep[ZSTD_REP_NUM],
    U32 const ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
    const U32 lengthToBeat,
    U32 const mls /* template */) {
    const ZSTD_compressionParameters* const cParams = &ms->cParams;
    U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM - 1);
    const BYTE* const base = ms->window.base;
    U32 const current = (U32)(ip - base);
    U32 const hashLog = cParams->hashLog;
    U32 const minMatch = (mls == 3) ? 3 : 4;
    U32* const hashTable = ms->hashTable;
    size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
    U32 matchIndex = hashTable[h];
    U32* const bt = ms->chainTable;
    U32 const btLog = cParams->chainLog - 1;
    U32 const btMask = (1U << btLog) - 1;
    size_t commonLengthSmaller = 0, commonLengthLarger = 0;
    const BYTE* const dictBase = ms->window.dictBase;
    U32 const dictLimit = ms->window.dictLimit;
    const BYTE* const dictEnd = dictBase + dictLimit;
    const BYTE* const prefixStart = base + dictLimit;
    U32 const btLow = (btMask >= current) ? 0 : current - btMask;
    U32 const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog);
    U32 const matchLow = windowLow ? windowLow : 1;
    U32* smallerPtr = bt + 2 * (current & btMask);
    U32* largerPtr = bt + 2 * (current & btMask) + 1;
    U32 matchEndIdx = current + 8 + 1; /* farthest referenced position of any match => detects repetitive patterns */
    U32 dummy32;                       /* to be nullified at the end */
    U32 mnum = 0;
    U32 nbCompares = 1U << cParams->searchLog;

    const ZSTD_matchState_t* dms = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL;
    const ZSTD_compressionParameters* const dmsCParams = dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL;
    const BYTE* const dmsBase = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL;
    const BYTE* const dmsEnd = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL;
    U32 const dmsHighLimit = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0;
    U32 const dmsLowLimit = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0;
    U32 const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0;
    U32 const dmsHashLog = dictMode == ZSTD_dictMatchState ? dmsCParams->hashLog : hashLog;
    U32 const dmsBtLog = dictMode == ZSTD_dictMatchState ? dmsCParams->chainLog - 1 : btLog;
    U32 const dmsBtMask = dictMode == ZSTD_dictMatchState ? (1U << dmsBtLog) - 1 : 0;
    U32 const dmsBtLow = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit
                             ? dmsHighLimit - dmsBtMask
                             : dmsLowLimit;

    size_t bestLength = lengthToBeat - 1;
    DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", current);

    /* check repCode */
    assert(ll0 <= 1); /* necessarily 1 or 0 */
    {
        U32 const lastR = ZSTD_REP_NUM + ll0;
        U32 repCode;
        for (repCode = ll0; repCode < lastR; repCode++) {
            U32 const repOffset = (repCode == ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
            U32 const repIndex = current - repOffset;
            U32 repLen = 0;
            assert(current >= dictLimit);
            if (repOffset - 1 /* intentional overflow, discards 0 and -1 */ < current
                                                                                  - dictLimit) { /* equivalent to
                                                                                                    `current > repIndex
                                                                                                    >= dictLimit` */
                if (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch)) {
                    repLen = (U32)ZSTD_count(ip + minMatch, ip + minMatch - repOffset, iLimit) + minMatch;
                }
            } else { /* repIndex < dictLimit || repIndex >= current */
                const BYTE* const repMatch =
                    dictMode == ZSTD_dictMatchState ? dmsBase + repIndex - dmsIndexDelta : dictBase + repIndex;
                assert(current >= windowLow);
                if (dictMode == ZSTD_extDict
                    && (((repOffset - 1) /*intentional overflow*/ < current - windowLow) /* equivalent to `current >
                                                                                            repIndex >= windowLow` */
                        & (((U32)((dictLimit - 1) - repIndex)
                            >= 3)) /* intentional overflow : do not test positions overlapping 2 memory segments */)
                    && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch))) {
                    repLen = (U32)ZSTD_count_2segments(ip + minMatch, repMatch + minMatch, iLimit, dictEnd, prefixStart)
                             + minMatch;
                }
                if (dictMode == ZSTD_dictMatchState
                    && (((repOffset - 1) /*intentional overflow*/ < current
                                                                        - (dmsLowLimit
                                                                           + dmsIndexDelta)) /* equivalent to `current >
                                                                                                repIndex >= dmsLowLimit`
                                                                                              */
                        & ((U32)((dictLimit - 1) - repIndex)
                           >= 3)) /* intentional overflow : do not test positions overlapping 2 memory segments */
                    && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch))) {
                    repLen = (U32)ZSTD_count_2segments(ip + minMatch, repMatch + minMatch, iLimit, dmsEnd, prefixStart)
                             + minMatch;
                }
            }
            /* save longer solution */
            if (repLen > bestLength) {
                DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u", repCode, ll0, repOffset, repLen);
                bestLength = repLen;
                matches[mnum].off = repCode - ll0;
                matches[mnum].len = (U32)repLen;
                mnum++;
                if ((repLen > sufficient_len) | (ip + repLen == iLimit)) { /* best possible */
                    return mnum;
                }
            }
        }
    }

    /* HC3 match finder */
    if ((mls == 3) /*static*/ && (bestLength < mls)) {
        U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
        if ((matchIndex3 >= matchLow)
            & (current - matchIndex3 < (1 << 18)) /*heuristic : longer distance likely too expensive*/) {
            size_t mlen;
            if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/
                || (matchIndex3 >= dictLimit)) {
                const BYTE* const match = base + matchIndex3;
                mlen = ZSTD_count(ip, match, iLimit);
            } else {
                const BYTE* const match = dictBase + matchIndex3;
                mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart);
            }

            /* save best solution */
            if (mlen >= mls /* == 3 > bestLength */) {
                DEBUGLOG(8, "found small match with hlog3, of length %u", (U32)mlen);
                bestLength = mlen;
                assert(current > matchIndex3);
                assert(mnum == 0); /* no prior solution */
                matches[0].off = (current - matchIndex3) + ZSTD_REP_MOVE;
                matches[0].len = (U32)mlen;
                mnum = 1;
                if ((mlen > sufficient_len) | (ip + mlen == iLimit)) { /* best possible length */
                    ms->nextToUpdate = current + 1;                    /* skip insertion */
                    return 1;
                }
            }
        }
        /* no dictMatchState lookup: dicts don't have a populated HC3 table */
    }

    hashTable[h] = current; /* Update Hash Table */

    while (nbCompares-- && (matchIndex >= matchLow)) {
        U32* const nextPtr = bt + 2 * (matchIndex & btMask);
        const BYTE* match;
        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
        assert(current > matchIndex);

        if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex + matchLength >= dictLimit)) {
            assert(matchIndex + matchLength >= dictLimit); /* ensure the condition is correct when !extDict */
            match = base + matchIndex;
            if (matchIndex >= dictLimit)
                assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */
            matchLength += ZSTD_count(ip + matchLength, match + matchLength, iLimit);
        } else {
            match = dictBase + matchIndex;
            assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */
            matchLength += ZSTD_count_2segments(ip + matchLength, match + matchLength, iLimit, dictEnd, prefixStart);
            if (matchIndex + matchLength >= dictLimit)
                match = base + matchIndex; /* prepare for match[matchLength] read */
        }

        if (matchLength > bestLength) {
            DEBUGLOG(8,
                     "found match of length %u at distance %u (offCode=%u)",
                     (U32)matchLength,
                     current - matchIndex,
                     current - matchIndex + ZSTD_REP_MOVE);
            assert(matchEndIdx > matchIndex);
            if (matchLength > matchEndIdx - matchIndex)
                matchEndIdx = matchIndex + (U32)matchLength;
            bestLength = matchLength;
            matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE;
            matches[mnum].len = (U32)matchLength;
            mnum++;
            if ((matchLength > ZSTD_OPT_NUM)
                | (ip + matchLength == iLimit) /* equal : no way to know if inf or sup */) {
                if (dictMode == ZSTD_dictMatchState)
                    nbCompares = 0; /* break should also skip searching dms */
                break;              /* drop, to preserve bt consistency (miss a little bit of compression) */
            }
        }

        if (match[matchLength] < ip[matchLength]) {
            /* match smaller than current */
            *smallerPtr = matchIndex;          /* update smaller idx */
            commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
            if (matchIndex <= btLow) {
                smallerPtr = &dummy32;
                break;
            }                         /* beyond tree size, stop the search */
            smallerPtr = nextPtr + 1; /* new candidate => larger than match, which was smaller than current */
            matchIndex = nextPtr[1];  /* new matchIndex, larger than previous, closer to current */
        } else {
            *largerPtr = matchIndex;
            commonLengthLarger = matchLength;
            if (matchIndex <= btLow) {
                largerPtr = &dummy32;
                break;
            } /* beyond tree size, stop the search */
            largerPtr = nextPtr;
            matchIndex = nextPtr[0];
        }
    }

    *smallerPtr = *largerPtr = 0;

    if (dictMode == ZSTD_dictMatchState && nbCompares) {
        size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls);
        U32 dictMatchIndex = dms->hashTable[dmsH];
        const U32* const dmsBt = dms->chainTable;
        commonLengthSmaller = commonLengthLarger = 0;
        while (nbCompares-- && (dictMatchIndex > dmsLowLimit)) {
            const U32* const nextPtr = dmsBt + 2 * (dictMatchIndex & dmsBtMask);
            size_t matchLength =
                MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
            const BYTE* match = dmsBase + dictMatchIndex;
            matchLength += ZSTD_count_2segments(ip + matchLength, match + matchLength, iLimit, dmsEnd, prefixStart);
            if (dictMatchIndex + matchLength >= dmsHighLimit)
                match = base + dictMatchIndex + dmsIndexDelta; /* to prepare for next usage of match[matchLength] */

            if (matchLength > bestLength) {
                matchIndex = dictMatchIndex + dmsIndexDelta;
                DEBUGLOG(8,
                         "found dms match of length %u at distance %u (offCode=%u)",
                         (U32)matchLength,
                         current - matchIndex,
                         current - matchIndex + ZSTD_REP_MOVE);
                if (matchLength > matchEndIdx - matchIndex)
                    matchEndIdx = matchIndex + (U32)matchLength;
                bestLength = matchLength;
                matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE;
                matches[mnum].len = (U32)matchLength;
                mnum++;
                if ((matchLength > ZSTD_OPT_NUM)
                    | (ip + matchLength == iLimit) /* equal : no way to know if inf or sup */) {
                    break; /* drop, to guarantee consistency (miss a little bit of compression) */
                }
            }

            if (dictMatchIndex <= dmsBtLow) {
                break;
            } /* beyond tree size, stop the search */
            if (match[matchLength] < ip[matchLength]) {
                commonLengthSmaller =
                    matchLength;             /* all smaller will now have at least this guaranteed common length */
                dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
            } else {
                /* match is larger than current */
                commonLengthLarger = matchLength;
                dictMatchIndex = nextPtr[0];
            }
        }
    }

    assert(matchEndIdx > current + 8);
    ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
    return mnum;
}

FORCE_INLINE_TEMPLATE U32
ZSTD_BtGetAllMatches(ZSTD_match_t* matches, /* store result (match found, increasing size) in this table */
                     ZSTD_matchState_t* ms,
                     U32* nextToUpdate3,
                     const BYTE* ip,
                     const BYTE* const iHighLimit,
                     const ZSTD_dictMode_e dictMode,
                     const U32 rep[ZSTD_REP_NUM],
                     U32 const ll0,
                     U32 const lengthToBeat) {
    const ZSTD_compressionParameters* const cParams = &ms->cParams;
    U32 const matchLengthSearch = cParams->minMatch;
    DEBUGLOG(8, "ZSTD_BtGetAllMatches");
    if (ip < ms->window.base + ms->nextToUpdate)
        return 0; /* skipped area */
    ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode);
    switch (matchLengthSearch) {
        case 3:
            return ZSTD_insertBtAndGetAllMatches(matches,
                                                 ms,
                                                 nextToUpdate3,
                                                 ip,
                                                 iHighLimit,
                                                 dictMode,
                                                 rep,
                                                 ll0,
                                                 lengthToBeat,
                                                 3);
        default:
        case 4:
            return ZSTD_insertBtAndGetAllMatches(matches,
                                                 ms,
                                                 nextToUpdate3,
                                                 ip,
                                                 iHighLimit,
                                                 dictMode,
                                                 rep,
                                                 ll0,
                                                 lengthToBeat,
                                                 4);
        case 5:
            return ZSTD_insertBtAndGetAllMatches(matches,
                                                 ms,
                                                 nextToUpdate3,
                                                 ip,
                                                 iHighLimit,
                                                 dictMode,
                                                 rep,
                                                 ll0,
                                                 lengthToBeat,
                                                 5);
        case 7:
        case 6:
            return ZSTD_insertBtAndGetAllMatches(matches,
                                                 ms,
                                                 nextToUpdate3,
                                                 ip,
                                                 iHighLimit,
                                                 dictMode,
                                                 rep,
                                                 ll0,
                                                 lengthToBeat,
                                                 6);
    }
}

/*-*******************************
 *  Optimal parser
 *********************************/
typedef struct repcodes_s {
    U32 rep[3];
} repcodes_t;

static repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0) {
    repcodes_t newReps;
    if (offset >= ZSTD_REP_NUM) { /* full offset */
        newReps.rep[2] = rep[1];
        newReps.rep[1] = rep[0];
        newReps.rep[0] = offset - ZSTD_REP_MOVE;
    } else { /* repcode */
        U32 const repCode = offset + ll0;
        if (repCode > 0) { /* note : if repCode==0, no change */
            U32 const currentOffset = (repCode == ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
            newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2];
            newReps.rep[1] = rep[0];
            newReps.rep[0] = currentOffset;
        } else { /* repCode == 0 */
            memcpy(&newReps, rep, sizeof(newReps));
        }
    }
    return newReps;
}

static U32 ZSTD_totalLen(ZSTD_optimal_t sol) {
    return sol.litlen + sol.mlen;
}

#if 0 /* debug */

static void
listStats(const U32* table, int lastEltID)
{
    int const nbElts = lastEltID + 1;
    int enb;
    for (enb=0; enb < nbElts; enb++) {
        (void)table;
        //RAWLOG(2, "%3i:%3i,  ", enb, table[enb]);
        RAWLOG(2, "%4i,", table[enb]);
    }
    RAWLOG(2, " \n");
}

#endif

FORCE_INLINE_TEMPLATE size_t ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
                                                            seqStore_t* seqStore,
                                                            U32 rep[ZSTD_REP_NUM],
                                                            const void* src,
                                                            size_t srcSize,
                                                            const int optLevel,
                                                            const ZSTD_dictMode_e dictMode) {
    optState_t* const optStatePtr = &ms->opt;
    const BYTE* const istart = (const BYTE*)src;
    const BYTE* ip = istart;
    const BYTE* anchor = istart;
    const BYTE* const iend = istart + srcSize;
    const BYTE* const ilimit = iend - 8;
    const BYTE* const base = ms->window.base;
    const BYTE* const prefixStart = base + ms->window.dictLimit;
    const ZSTD_compressionParameters* const cParams = &ms->cParams;

    U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM - 1);
    U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
    U32 nextToUpdate3 = ms->nextToUpdate;

    ZSTD_optimal_t* const opt = optStatePtr->priceTable;
    ZSTD_match_t* const matches = optStatePtr->matchTable;
    ZSTD_optimal_t lastSequence;

    /* init */
    DEBUGLOG(5,
             "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
             (U32)(ip - base),
             ms->window.dictLimit,
             ms->nextToUpdate);
    assert(optLevel <= 2);
    ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel);
    ip += (ip == prefixStart);

    /* Match Loop */
    while (ip < ilimit) {
        U32 cur, last_pos = 0;

        /* find first match */
        {
            U32 const litlen = (U32)(ip - anchor);
            U32 const ll0 = !litlen;
            U32 const nbMatches =
                ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch);
            if (!nbMatches) {
                ip++;
                continue;
            }

            /* initialize opt[0] */
            {
                U32 i;
                for (i = 0; i < ZSTD_REP_NUM; i++)
                    opt[0].rep[i] = rep[i];
            }
            opt[0].mlen = 0; /* means is_a_literal */
            opt[0].litlen = litlen;
            opt[0].price = ZSTD_literalsContribution(anchor, litlen, optStatePtr, optLevel);

            /* large match -> immediate encoding */
            {
                U32 const maxML = matches[nbMatches - 1].len;
                U32 const maxOffset = matches[nbMatches - 1].off;
                DEBUGLOG(6,
                         "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series",
                         nbMatches,
                         maxML,
                         maxOffset,
                         (U32)(ip - prefixStart));

                if (maxML > sufficient_len) {
                    lastSequence.litlen = litlen;
                    lastSequence.mlen = maxML;
                    lastSequence.off = maxOffset;
                    DEBUGLOG(6, "large match (%u>%u), immediate encoding", maxML, sufficient_len);
                    cur = 0;
                    last_pos = ZSTD_totalLen(lastSequence);
                    goto _shortestPath;
                }
            }

            /* set prices for first matches starting position == 0 */
            {
                U32 const literalsPrice = opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
                U32 pos;
                U32 matchNb;
                for (pos = 1; pos < minMatch; pos++) {
                    opt[pos].price = ZSTD_MAX_PRICE; /* mlen, litlen and price will be fixed during forward scanning */
                }
                for (matchNb = 0; matchNb < nbMatches; matchNb++) {
                    U32 const offset = matches[matchNb].off;
                    U32 const end = matches[matchNb].len;
                    repcodes_t const repHistory = ZSTD_updateRep(rep, offset, ll0);
                    for (; pos <= end; pos++) {
                        U32 const matchPrice = ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel);
                        U32 const sequencePrice = literalsPrice + matchPrice;
                        DEBUGLOG(7, "rPos:%u => set initial price : %.2f", pos, ZSTD_fCost(sequencePrice));
                        opt[pos].mlen = pos;
                        opt[pos].off = offset;
                        opt[pos].litlen = litlen;
                        opt[pos].price = sequencePrice;
                        ZSTD_STATIC_ASSERT(sizeof(opt[pos].rep) == sizeof(repHistory));
                        memcpy(opt[pos].rep, &repHistory, sizeof(repHistory));
                    }
                }
                last_pos = pos - 1;
            }
        }

        /* check further positions */
        for (cur = 1; cur <= last_pos; cur++) {
            const BYTE* const inr = ip + cur;
            assert(cur < ZSTD_OPT_NUM);
            DEBUGLOG(7, "cPos:%zi==rPos:%u", inr - istart, cur)

            /* Fix current position with one literal if cheaper */
            {
                U32 const litlen = (opt[cur - 1].mlen == 0) ? opt[cur - 1].litlen + 1 : 1;
                int const price = opt[cur - 1].price + ZSTD_rawLiteralsCost(ip + cur - 1, 1, optStatePtr, optLevel)
                                  + ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
                                  - ZSTD_litLengthPrice(litlen - 1, optStatePtr, optLevel);
                assert(price < 1000000000); /* overflow check */
                if (price <= opt[cur].price) {
                    DEBUGLOG(7,
                             "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
                             inr - istart,
                             cur,
                             ZSTD_fCost(price),
                             ZSTD_fCost(opt[cur].price),
                             litlen,
                             opt[cur - 1].rep[0],
                             opt[cur - 1].rep[1],
                             opt[cur - 1].rep[2]);
                    opt[cur].mlen = 0;
                    opt[cur].off = 0;
                    opt[cur].litlen = litlen;
                    opt[cur].price = price;
                    memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(opt[cur].rep));
                } else {
                    DEBUGLOG(7,
                             "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)",
                             inr - istart,
                             cur,
                             ZSTD_fCost(price),
                             ZSTD_fCost(opt[cur].price),
                             opt[cur].rep[0],
                             opt[cur].rep[1],
                             opt[cur].rep[2]);
                }
            }

            /* last match must start at a minimum distance of 8 from oend */
            if (inr > ilimit)
                continue;

            if (cur == last_pos)
                break;

            if ((optLevel == 0) /*static_test*/
                && (opt[cur + 1].price <= opt[cur].price + (BITCOST_MULTIPLIER / 2))) {
                DEBUGLOG(7, "move to next rPos:%u : price is <=", cur + 1);
                continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
            }

            {
                U32 const ll0 = (opt[cur].mlen != 0);
                U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
                U32 const previousPrice = opt[cur].price;
                U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
                U32 const nbMatches =
                    ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch);
                U32 matchNb;
                if (!nbMatches) {
                    DEBUGLOG(7, "rPos:%u : no match found", cur);
                    continue;
                }

                {
                    U32 const maxML = matches[nbMatches - 1].len;
                    DEBUGLOG(7,
                             "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u",
                             inr - istart,
                             cur,
                             nbMatches,
                             maxML);

                    if ((maxML > sufficient_len) || (cur + maxML >= ZSTD_OPT_NUM)) {
                        lastSequence.mlen = maxML;
                        lastSequence.off = matches[nbMatches - 1].off;
                        lastSequence.litlen = litlen;
                        cur -= (opt[cur].mlen == 0)
                                   ? opt[cur].litlen
                                   : 0; /* last sequence is actually only literals, fix cur to last match - note : may
                                           underflow, in which case, it's first sequence, and it's okay */
                        last_pos = cur + ZSTD_totalLen(lastSequence);
                        if (cur > ZSTD_OPT_NUM)
                            cur = 0; /* underflow => first match */
                        goto _shortestPath;
                    }
                }

                /* set prices using matches found at position == cur */
                for (matchNb = 0; matchNb < nbMatches; matchNb++) {
                    U32 const offset = matches[matchNb].off;
                    repcodes_t const repHistory = ZSTD_updateRep(opt[cur].rep, offset, ll0);
                    U32 const lastML = matches[matchNb].len;
                    U32 const startML = (matchNb > 0) ? matches[matchNb - 1].len + 1 : minMatch;
                    U32 mlen;

                    DEBUGLOG(7,
                             "testing match %u => offCode=%4u, mlen=%2u, llen=%2u",
                             matchNb,
                             matches[matchNb].off,
                             lastML,
                             litlen);

                    for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */
                        U32 const pos = cur + mlen;
                        int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);

                        if ((pos > last_pos) || (price < opt[pos].price)) {
                            DEBUGLOG(7,
                                     "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
                                     pos,
                                     mlen,
                                     ZSTD_fCost(price),
                                     ZSTD_fCost(opt[pos].price));
                            while (last_pos < pos) {
                                opt[last_pos + 1].price = ZSTD_MAX_PRICE;
                                last_pos++;
                            } /* fill empty positions */
                            opt[pos].mlen = mlen;
                            opt[pos].off = offset;
                            opt[pos].litlen = litlen;
                            opt[pos].price = price;
                            ZSTD_STATIC_ASSERT(sizeof(opt[pos].rep) == sizeof(repHistory));
                            memcpy(opt[pos].rep, &repHistory, sizeof(repHistory));
                        } else {
                            DEBUGLOG(7,
                                     "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
                                     pos,
                                     mlen,
                                     ZSTD_fCost(price),
                                     ZSTD_fCost(opt[pos].price));
                            if (optLevel == 0)
                                break; /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
                        }
                    }
                }
            }
        } /* for (cur = 1; cur <= last_pos; cur++) */

        lastSequence = opt[last_pos];
        cur = last_pos > ZSTD_totalLen(lastSequence) ? last_pos - ZSTD_totalLen(lastSequence)
                                                     : 0; /* single sequence, and it starts before `ip` */
        assert(cur < ZSTD_OPT_NUM);                       /* control overflow*/

    _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
        assert(opt[0].mlen == 0);

        {
            U32 const storeEnd = cur + 1;
            U32 storeStart = storeEnd;
            U32 seqPos = cur;

            DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)", last_pos, cur);
            (void)last_pos;
            assert(storeEnd < ZSTD_OPT_NUM);
            DEBUGLOG(6,
                     "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
                     storeEnd,
                     lastSequence.litlen,
                     lastSequence.mlen,
                     lastSequence.off);
            opt[storeEnd] = lastSequence;
            while (seqPos > 0) {
                U32 const backDist = ZSTD_totalLen(opt[seqPos]);
                storeStart--;
                DEBUGLOG(6,
                         "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
                         seqPos,
                         storeStart,
                         opt[seqPos].litlen,
                         opt[seqPos].mlen,
                         opt[seqPos].off);
                opt[storeStart] = opt[seqPos];
                seqPos = (seqPos > backDist) ? seqPos - backDist : 0;
            }

            /* save sequences */
            DEBUGLOG(6, "sending selected sequences into seqStore") {
                U32 storePos;
                for (storePos = storeStart; storePos <= storeEnd; storePos++) {
                    U32 const llen = opt[storePos].litlen;
                    U32 const mlen = opt[storePos].mlen;
                    U32 const offCode = opt[storePos].off;
                    U32 const advance = llen + mlen;
                    DEBUGLOG(6,
                             "considering seq starting at %zi, llen=%u, mlen=%u",
                             anchor - istart,
                             (unsigned)llen,
                             (unsigned)mlen);

                    if (mlen == 0) { /* only literals => must be last "sequence", actually starting a new stream of
                                        sequences */
                        assert(storePos == storeEnd); /* must be last sequence */
                        ip = anchor + llen; /* last "sequence" is a bunch of literals => don't progress anchor */
                        continue;           /* will finish */
                    }

                    /* repcodes update : like ZSTD_updateRep(), but update in place */
                    if (offCode >= ZSTD_REP_NUM) { /* full offset */
                        rep[2] = rep[1];
                        rep[1] = rep[0];
                        rep[0] = offCode - ZSTD_REP_MOVE;
                    } else { /* repcode */
                        U32 const repCode = offCode + (llen == 0);
                        if (repCode) { /* note : if repCode==0, no change */
                            U32 const currentOffset = (repCode == ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
                            if (repCode >= 2)
                                rep[2] = rep[1];
                            rep[1] = rep[0];
                            rep[0] = currentOffset;
                        }
                    }

                    assert(anchor + llen <= iend);
                    ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen);
                    ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen - MINMATCH);
                    anchor += advance;
                    ip = anchor;
                }
            }
            ZSTD_setBasePrices(optStatePtr, optLevel);
        }

    } /* while (ip < ilimit) */

    /* Return the last literals size */
    return (size_t)(iend - anchor);
}

size_t ZSTD_compressBlock_btopt(ZSTD_matchState_t* ms,
                                seqStore_t* seqStore,
                                U32 rep[ZSTD_REP_NUM],
                                const void* src,
                                size_t srcSize) {
    DEBUGLOG(5, "ZSTD_compressBlock_btopt");
    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_noDict);
}

/* used in 2-pass strategy */
static U32 ZSTD_upscaleStat(unsigned* table, U32 lastEltIndex, int bonus) {
    U32 s, sum = 0;
    assert(ZSTD_FREQ_DIV + bonus >= 0);
    for (s = 0; s < lastEltIndex + 1; s++) {
        table[s] <<= ZSTD_FREQ_DIV + bonus;
        table[s]--;
        sum += table[s];
    }
    return sum;
}

/* used in 2-pass strategy */
MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr) {
    if (ZSTD_compressedLiterals(optPtr))
        optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0);
    optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0);
    optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0);
    optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0);
}

/* ZSTD_initStats_ultra():
 * make a first compression pass, just to seed stats with more accurate starting values.
 * only works on first block, with no dictionary and no ldm.
 * this function cannot error, hence its contract must be respected.
 */
static void ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
                                 seqStore_t* seqStore,
                                 U32 rep[ZSTD_REP_NUM],
                                 const void* src,
                                 size_t srcSize) {
    U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */
    memcpy(tmpRep, rep, sizeof(tmpRep));

    DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize);
    assert(ms->opt.litLengthSum == 0);                       /* first block */
    assert(seqStore->sequences == seqStore->sequencesStart); /* no ldm */
    assert(ms->window.dictLimit == ms->window.lowLimit);     /* no dictionary */
    assert(ms->window.dictLimit - ms->nextToUpdate
           <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */

    ZSTD_compressBlock_opt_generic(ms,
                                   seqStore,
                                   tmpRep,
                                   src,
                                   srcSize,
                                   2 /*optLevel*/,
                                   ZSTD_noDict); /* generate stats into ms->opt*/

    /* invalidate first scan from history */
    ZSTD_resetSeqStore(seqStore);
    ms->window.base -= srcSize;
    ms->window.dictLimit += (U32)srcSize;
    ms->window.lowLimit = ms->window.dictLimit;
    ms->nextToUpdate = ms->window.dictLimit;

    /* re-inforce weight of collected statistics */
    ZSTD_upscaleStats(&ms->opt);
}

size_t ZSTD_compressBlock_btultra(ZSTD_matchState_t* ms,
                                  seqStore_t* seqStore,
                                  U32 rep[ZSTD_REP_NUM],
                                  const void* src,
                                  size_t srcSize) {
    DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
}

size_t ZSTD_compressBlock_btultra2(ZSTD_matchState_t* ms,
                                   seqStore_t* seqStore,
                                   U32 rep[ZSTD_REP_NUM],
                                   const void* src,
                                   size_t srcSize) {
    U32 const current = (U32)((const BYTE*)src - ms->window.base);
    DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);

    /* 2-pass strategy:
     * this strategy makes a first pass over first block to collect statistics
     * and seed next round's statistics with it.
     * After 1st pass, function forgets everything, and starts a new block.
     * Consequently, this can only work if no data has been previously loaded in tables,
     * aka, no dictionary, no prefix, no ldm preprocessing.
     * The compression ratio gain is generally small (~0.5% on first block),
     * the cost is 2x cpu time on first block. */
    assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
    if ((ms->opt.litLengthSum == 0)                          /* first block */
        && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */
        && (ms->window.dictLimit == ms->window.lowLimit)     /* no dictionary */
        && (current == ms->window.dictLimit)                 /* start of frame, nothing already loaded nor skipped */
        && (srcSize > ZSTD_PREDEF_THRESHOLD)) {
        ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
    }

    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
}

size_t ZSTD_compressBlock_btopt_dictMatchState(ZSTD_matchState_t* ms,
                                               seqStore_t* seqStore,
                                               U32 rep[ZSTD_REP_NUM],
                                               const void* src,
                                               size_t srcSize) {
    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_dictMatchState);
}

size_t ZSTD_compressBlock_btultra_dictMatchState(ZSTD_matchState_t* ms,
                                                 seqStore_t* seqStore,
                                                 U32 rep[ZSTD_REP_NUM],
                                                 const void* src,
                                                 size_t srcSize) {
    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_dictMatchState);
}

size_t ZSTD_compressBlock_btopt_extDict(ZSTD_matchState_t* ms,
                                        seqStore_t* seqStore,
                                        U32 rep[ZSTD_REP_NUM],
                                        const void* src,
                                        size_t srcSize) {
    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_extDict);
}

size_t ZSTD_compressBlock_btultra_extDict(ZSTD_matchState_t* ms,
                                          seqStore_t* seqStore,
                                          U32 rep[ZSTD_REP_NUM],
                                          const void* src,
                                          size_t srcSize) {
    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_extDict);
}

/* note : no btultra2 variant for extDict nor dictMatchState,
 * because btultra2 is not meant to work with dictionaries
 * and is only specific for the first block (no prefix) */
